# Tutorial 3: Application on SeqFISH mouse embryo dataset. In this vignette, We performed `PROST` on the processed mouse embryo ST data from [(Lohoff, T. et al. 2022)](https://doi.org/10.1038/s41587-021-01006-2) generated by [SeqFISH](https://spatial.caltech.edu/seqfish/) to evaluate its general applicability. The [original data](https://marionilab.cruk.cam.ac.uk/SpatialMouseAtlas/) can be downloaded from [google drive](https://drive.google.com/drive/folders/1r5Cuo4YqZVtFPpB-VqDoHkMW6vhl9glf?usp=share_link). --- ## Identify SVGs ### 1.Load PROST and its dependent packages import pandas as pd import numpy as np import scanpy as sc import os import warnings warnings.filterwarnings("ignore") import matplotlib as mpl import matplotlib.pyplot as plt import PROST PROST.__version__ >>> ' 1.1.2 ' ### 2.Set up the working environment and import data # the location of R (used for the mclust clustering) ENVpath = "your path of PROST_ENV" # refer to 'How to use PROST' section os.environ['R_HOME'] = f'{ENVpath}/lib/R' os.environ['R_USER'] = f'{ENVpath}/lib/python3.7/site-packages/rpy2' # init SEED = 818 PROST.setup_seed(SEED) # Set directory (If you want to use additional data, please change the file path) rootdir = 'datasets/SeqFISH/' input_dir = os.path.join(rootdir) output_dir = os.path.join(rootdir,'results/') if not os.path.isdir(output_dir): os.makedirs(output_dir) # Read counts and metadata counts = pd.read_csv(input_dir + "counts.txt", sep = "\t") metadata = pd.read_csv(input_dir + "metadata.txt", sep = "\t") gene_name = counts.index # Create anndata for embryo1 (embryo2 or embryo3) embryo = 1 # Read data metadata = metadata[metadata["embryo"]==f"embryo{embryo}"] counts = counts.loc[:,metadata["uniqueID"]] spatial = metadata[["x_global","y_global"]] spatial.index = metadata["uniqueID"] # Create anndata adata = sc.AnnData(counts.T) adata.var_names_make_unique() # read spatial adata.obsm["spatial"] = spatial.to_numpy() # read annotation annotation = metadata["celltype_mapped_refined"] annotation.index = metadata["uniqueID"] adata.obs["annotation"] = annotation # save data adata.write_h5ad(output_dir+f"/used_data_embryo{embryo}.h5") ### 3.Calculate and save PI adata = sc.read(output_dir+f"/used_data_embryo{embryo}.h5") adata = PROST.prepare_for_PI(adata, percentage = 0.01, platform="SeqFISH") adata = PROST.cal_PI(adata, kernel_size=8, platform="SeqFISH",del_rate=0.05) # Hypothesis test ''' PROST.spatial_autocorrelation(adata, k = 10, permutations = None) ''' # Save PI results adata.write_h5ad(output_dir+f"/PI_result_embryo{embryo}.h5") >>> Filtering genes ... >>> Trying to set attribute `.var` of view, copying. >>> Normalization to each gene: >>> 100%|██████████| 351/351 [00:00<00:00, 5237.45it/s] >>> Gaussian filtering for each gene: >>> 100%|██████████| 351/351 [00:40<00:00, 8.67it/s] >>> Binary segmentation for each gene: >>> 100%|██████████| 351/351 [00:00<00:00, 18470.16it/s] >>> Spliting subregions for each gene: >>> 100%|██████████| 351/351 [00:00<00:00, 8355.38it/s] >>> Computing PROST Index for each gene: >>> 100%|██████████| 351/351 [00:39<00:00, 8.99it/s] >>> PROST Index calculation completed !! ### 4.Draw SVGs detected by PI PROST.plot_gene(adata, platform="SeqFISH", size = 0.3, top_n = 25, ncols_each_sheet = 5, nrows_each_sheet = 5,save_path = output_dir) >>> Drawing pictures: >>> 100%|██████████| 1/1 [00:15<00:00, 15.74s/it] >>> Drawing completed !! ![SeqFish_mouse_embryo_svgs](./_images/SeqFish/SeqFish_mouse_embryo_svgs.png "Draw SVGs detected by PI") --- ## Clustering # Set the number of clusters n_clusters = 24 ### 1.Read PI result and Expression data preprocessing PROST.setup_seed(SEED) # Read PI result adata = sc.read(output_dir+f"/PI_result_embryo{embryo}.h5") sc.pp.normalize_total(adata) sc.pp.log1p(adata) ### 2.Run PROST clustering PROST.run_PNN(adata, adj_mode = "distance", min_distance = 3, init="mclust", n_clusters = n_clusters, SEED=SEED, post_processing = False, cuda = False) >>> Calculating adjacency matrix ... >>> Running PCA ... >>> Laplacian Smoothing ... >>> Initializing cluster centers with mclust, n_clusters known >>> Epoch: : 501it [3:17:42, 23.68s/it, loss=0.28359604] >>> Clustering completed !! ### 3.Save result adata.write_h5ad(output_dir + f"/PNN_result_embryo{embryo}.h5") clustering = adata.obs["clustering"] clustering.to_csv(output_dir + f"/clusters_embryo{embryo}.csv",header = False) embedding = adata.obsm["PROST"] np.savetxt(output_dir + f"/embedding_embryo{embryo}.txt",embedding) ### 4.Plot annotation plt.rcParams["figure.figsize"] = (5,5) ax = sc.pl.embedding(adata, basis="spatial", color="annotation",size = 7,s=6, show=False, title='annotation') ax.invert_yaxis() plt.axis('off') plt.savefig(output_dir+f"/annotation_embryo{embryo}.png", dpi=600, bbox_inches='tight') ![annotation results](./_images/SeqFish/SeqFish_mouse_embryo_annotation.png "annotation results") ### 5.Plot clustering result plt.rcParams["figure.figsize"] = (5,5) ax = sc.pl.embedding(adata, basis="spatial", color="clustering",size = 7,s=6, show=False, title='clustering') ax.invert_yaxis() plt.axis('off') plt.savefig(output_dir+"/clustering.png", dpi=600, bbox_inches='tight') ![SeqFish_mouse_embryo_clusterresult](./_images/SeqFish/SeqFish_mouse_embryo_clusterresult.png "SeqFish_mouse_embryo_umap_result") ---